pacman::p_load(sf, tmap, sfdep, tidyverse, mapview)Take Home Exercise 1
Getting Started
Geospatial Data
busStops <- st_read(dsn = "data/geospatial",
layer = "BusStop") %>%
st_transform(crs = 3414)Reading layer `BusStop' from data source
`/Users/michaelberlian/Desktop/SMU Nov/ISSS624 GeoSpatial/ISSS624/Take-Home_Ex/Take-Home_Ex1/data/geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 5161 features and 3 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 3970.122 ymin: 26482.1 xmax: 48284.56 ymax: 52983.82
Projected CRS: SVY21
Plotting Bus Stop points
tmap_mode("view")tmap mode set to interactive viewing
tm_shape(busStops) +
tm_dots()Hexagonal Grid Creation
grid = st_make_grid(busStops, c(500), what = "polygons", square = FALSE)
# To sf and add grid ID
grid_sf = st_sf(grid) %>%
# add grid ID
mutate(grid_id = 1:length(lengths(grid)))grid_sf$n_colli = lengths(st_intersects(grid_sf, busStops))
# remove grid without value of 0 (i.e. no points in side that grid)
grid_count = filter(grid_sf,n_colli > 0 )tmap_mode("view")tmap mode set to interactive viewing
tm_shape(grid_count) +
tm_fill(
col = "n_colli",
palette = "Blues",
style = "cont",
title = "Number of collisions",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
popup.vars = c(
"Number of collisions: " = "n_colli"
),
popup.format = list(
n_colli = list(format = "f", digits = 0)
)
) +
tm_borders(col = "grey40", lwd = 0.7)removing bus stop grids outside of singapore
grid_count_rm <- grid_count %>%
filter(!grid_id == 1767,
!grid_id == 2073,
!grid_id == 2135,
!grid_id == 2104)Aspatial Data
busTrips <- read_csv("data/aspatial/origin_destination_bus_202308.csv")Rows: 5709512 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): YEAR_MONTH, DAY_TYPE, PT_TYPE, ORIGIN_PT_CODE, DESTINATION_PT_CODE
dbl (2): TIME_PER_HOUR, TOTAL_TRIPS
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# busTrips <- read_csv("data/aspatial/origin_destination_bus_202309.csv")
# busTrips <- read_csv("data/aspatial/origin_destination_bus_202310.csv")
busTrips$ORIGIN_PT_CODE <- as.factor(busTrips$ORIGIN_PT_CODE)
busTrips$DESTINATION_PT_CODE <- as.factor(busTrips$DESTINATION_PT_CODE)Data wrangling
Aspatial Data Wrangling
calculating bus trip in weekday and morning peak
busTripsDayMorning <- busTrips %>%
filter(DAY_TYPE == "WEEKDAY",
TIME_PER_HOUR >= 6,
TIME_PER_HOUR <= 9) %>%
group_by(ORIGIN_PT_CODE) %>%
summarise(WeekdayMorningTrips = sum(TOTAL_TRIPS))calculating bus trip in weekday and afternoon peak
busTripsDayAfternoon <- busTrips %>%
filter(DAY_TYPE == "WEEKDAY",
TIME_PER_HOUR >= 17,
TIME_PER_HOUR <= 20) %>%
group_by(ORIGIN_PT_CODE) %>%
summarise(WeekdayAfternoonTrips = sum(TOTAL_TRIPS))calculating bus trip in weekend and morning peak
busTripsEndMorning <- busTrips %>%
filter(DAY_TYPE == "WEEKENDS/HOLIDAY",
TIME_PER_HOUR >= 11,
TIME_PER_HOUR <= 14) %>%
group_by(ORIGIN_PT_CODE) %>%
summarise(WeekendMorningTrips = sum(TOTAL_TRIPS))calculating bus trip in weekend and evening peak
busTripsEndEvening <- busTrips %>%
filter(DAY_TYPE == "WEEKENDS/HOLIDAY",
TIME_PER_HOUR >= 16,
TIME_PER_HOUR <= 19) %>%
group_by(ORIGIN_PT_CODE) %>%
summarise(WeekendEveningTrips = sum(TOTAL_TRIPS))Combining the Peak Trips
BusTrips_comb <- busTripsDayMorning %>%
left_join(busTripsDayAfternoon) %>%
left_join(busTripsEndMorning) %>%
left_join(busTripsEndEvening)Joining with `by = join_by(ORIGIN_PT_CODE)`
Joining with `by = join_by(ORIGIN_PT_CODE)`
Joining with `by = join_by(ORIGIN_PT_CODE)`
Geospatial Data Wrangling
grid_bus <- st_join(grid_count_rm,busStops,join = st_contains) %>%
unnest() %>%
select(grid_id,BUS_STOP_N)Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(grid)`.
grid_bus$BUS_STOP_N <- as.factor(grid_bus$BUS_STOP_N)Joining aspatial and geospatial
Trips <- left_join(BusTrips_comb,grid_bus,
by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
group_by(grid_id)%>%
summarise(WeekdayMorningTrips = sum(WeekdayMorningTrips),
WeekdayAfternoonTrips = sum(WeekdayAfternoonTrips),
WeekendMorningTrips = sum(WeekendMorningTrips),
WeekendEveningTrips = sum(WeekendEveningTrips))Trips <- left_join(grid_count_rm,Trips) %>%
mutate (Total_Trips = WeekdayMorningTrips+WeekdayAfternoonTrips+WeekendMorningTrips +WeekendEveningTrips) %>%
rename (n_bus = n_colli)Joining with `by = join_by(grid_id)`
Trip per bus stop, to see if the trip amount is caused by number of bus station or is it really packed
TripsPerBusStop <- Trips %>%
mutate (WeekdayMorningTrips = WeekdayMorningTrips/n_bus,
WeekdayAfternoonTrips = WeekdayAfternoonTrips/n_bus,
WeekendMorningTrips = WeekendMorningTrips/n_bus,
WeekendEveningTrips = WeekendEveningTrips/n_bus)TripsLog <- Trips %>%
mutate (WeekdayMorningTrips = log(WeekdayMorningTrips),
WeekdayAfternoonTrips = log(WeekdayAfternoonTrips),
WeekendMorningTrips = log(WeekendMorningTrips),
WeekendEveningTrips = log(WeekendEveningTrips))Visualising
tmap_mode("view")tmap mode set to interactive viewing
tm_shape(Trips) +
tm_fill(
col = "Total_Trips",
palette = "Blues",
style = "quantile",
title = "Total Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
popup.format = list(
Total_Trips = list(format = "f", digits = 0)
)
) +
tm_borders(col = "grey40", lwd = 0.7)Warning: popup.vars not specified whereas popup.format is a list
The plot above are from total bus trip per origin bus station.
From the plot of total trips above, can be seen that bus trips are spreaded around across the country. However, there are several clusters can be seen.
Total Trips
weekday_morning <- tm_shape(Trips) +
tm_fill(
col = "WeekdayMorningTrips",
palette = "Blues",
style = "quantile",
title = "Weekday Morning Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekday_afternoon <- tm_shape(Trips) +
tm_fill(
col = "WeekdayAfternoonTrips",
palette = "Blues",
style = "quantile",
title = "Weekday Afternoon Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekend_morning <- tm_shape(Trips) +
tm_fill(
col = "WeekendMorningTrips",
palette = "Blues",
style = "quantile",
title = "Weekend Morning Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekend_evening <- tm_shape(Trips) +
tm_fill(
col = "WeekendEveningTrips",
palette = "Blues",
style = "quantile",
title = "Weekend Evening Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)tmap_mode("plot")tmap mode set to plotting
tmap_arrange(weekday_morning, weekday_afternoon, weekend_morning, weekend_evening,
ncol=2, nrow=2)
The plot above are segragated trips divided into 4 categories: Weekday morning, Weekday afternoon, Weekend morning, and weekend evening. There are only minimal differences between them. However a noticable different is on the east side. The east side quantile is lower on the evening or afternoon than in the morning both on weekend and weekday. Also, in the central can be seen that they have a higher quantile during the afternoon and evening.
Total Trips per Bus Stop
weekday_morning_per_stop <- tm_shape(TripsPerBusStop) +
tm_fill(
col = "WeekdayMorningTrips",
palette = "Blues",
style = "quantile",
title = "Weekday Morning Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekday_afternoon_per_stop <- tm_shape(TripsPerBusStop) +
tm_fill(
col = "WeekdayAfternoonTrips",
palette = "Blues",
style = "quantile",
title = "Weekday Afternoon Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekend_morning_per_stop <- tm_shape(TripsPerBusStop) +
tm_fill(
col = "WeekendMorningTrips",
palette = "Blues",
style = "quantile",
title = "Weekend Morning Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekend_evening_per_stop <- tm_shape(TripsPerBusStop) +
tm_fill(
col = "WeekendEveningTrips",
palette = "Blues",
style = "quantile",
title = "Weekend Evening Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)tmap_mode("plot")tmap mode set to plotting
tmap_arrange(weekday_morning_per_stop, weekday_afternoon_per_stop,
weekend_morning_per_stop, weekend_evening_per_stop,
ncol=2, nrow=2) 
the plot above shows the differences of total number of trips based of the time of day and the day. The number of total trip in a hexagon are divided per each number of bus station in the hexagon. However, the result are not change by a lot as well.
Log value of trips
weekday_morning_log <- tm_shape(TripsLog) +
tm_fill(
col = "WeekdayMorningTrips",
palette = "Blues",
style = "quantile",
title = "Weekday Morning Trips per Bus Stop",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekday_afternoon_log <- tm_shape(TripsLog) +
tm_fill(
col = "WeekdayAfternoonTrips",
palette = "Blues",
style = "quantile",
title = "Weekday Afternoon Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekend_morning_log <- tm_shape(TripsLog) +
tm_fill(
col = "WeekendMorningTrips",
palette = "Blues",
style = "quantile",
title = "Weekend Morning Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekend_evening_log <- tm_shape(TripsLog) +
tm_fill(
col = "WeekendEveningTrips",
palette = "Blues",
style = "quantile",
title = "Weekend Evening Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)tmap_mode("plot")tmap mode set to plotting
tmap_arrange(weekday_morning_log, weekday_afternoon_log,
weekend_morning_log, weekend_evening_log,
ncol=2, nrow=2)
The above plot shows the value of total number of trip per hexagon after applying log to the number. There are also minimal differences between them.
Local Indicators of Spatial Association (LISA) Analysis
Spatial Weight
wm_idw <- Trips %>%
mutate(nb = st_contiguity(grid),
wts = st_inverse_distance(nb, grid,
scale = 1,
alpha = 1),
.before = 1) %>%
mutate(grid_id = 1:length(Trips$grid_id))! Polygon provided. Using point on surface.
# wm_idw$WeekdayMorningTrips <- ifelse(is.na(wm_idw$WeekdayMorningTrips),
# 0.99,wm_idw$WeekdayMorningTrips)Local Moran
Weekday Morning
lisa <- wm_idw %>%
mutate(local_moran = local_moran(
WeekdayMorningTrips, nb, wts, nsim = 99, na.action=na.pass),
.before = 1) %>%
unnest(local_moran)Warning: There were 2 warnings in `stopifnot()`.
The first warning was:
ℹ In argument: `local_moran = local_moran(WeekdayMorningTrips, nb, wts, nsim =
99, na.action = na.pass)`.
Caused by warning in `lag.listw()`:
! NAs in lagged values
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
ii_val_moran <- tm_shape(lisa) +
tm_fill("ii") +
tm_borders(alpha = 0.5) +
tm_view(set.zoom.limits = c(6,8)) +
tm_layout(main.title = "local Moran's I of Trip",
main.title.size = 0.8)p_val_moran <- tm_shape(lisa) +
tm_fill("p_ii_sim") +
tm_borders(alpha = 0.5) +
tm_layout(main.title = "p-value of local Moran's I",
main.title.size = 0.8)tmap_arrange(ii_val_moran, p_val_moran, ncol = 2)Variable(s) "ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

lisa_sig <- lisa %>%
filter(p_ii_sim < 0.05)
pal <- RColorBrewer::brewer.pal("Set1", n = 5)
tm_shape(lisa) +
tm_polygons() +
tm_borders(alpha = 0.5) +
tm_shape(lisa_sig) +
tm_fill("mean", palette = pal ) +
tm_borders(alpha = 0.4)Warning: One tm layer group has duplicated layer types, which are omitted. To
draw multiple layers of the same type, use multiple layer groups (i.e. specify
tm_shape prior to each of them).
